The Ocean Conservancy’s Urban Ocean program has been developing projects that mitigate marine pollution, assess waste management, and enable cities to address ocean plastics and resilience. These projects intend to deploy “zero-waste” pilot solutions in multiple cities where they have partnerships and have collected waste management information. Our objective is to develop a litter-accumulation assessment model that identifies effective zero-waste site locations for national and multinational use. This geospatial risk assessment model serves the purpose of predicting litter accumulation based on globally sourced data with a repeatable framework on an international scale. The results of the model evaluate which sections of a given area have the highest likelihood to produce and contain litter relative to its surroundings. For our case, the Urban Ocean program intends to dedicate “zero-waste” solutions in these areas for the most effective impact for each deployment.
For our model, we directed our focus on various cities apart of the Resilient Cities Network. The cities we display in this demonstration are Chennai India; Bangkok, Thailand; and Santiago, Chile. The data regarding the region and its attributes was sourced from OpenStreetMap (OSM) to ensure a repeatable framework & quality consistency. For each selected city we developed a ‘boundary box’ automated by OSM simply by specifying the name of the area (city & nation) and using the output coordinates as the scope of data acquisition. From there, further attributes of the area with respect to structures, networks, or political divisions can be extracted for use.
In terms of our dependent variable, litter data, the process required manually downloading data from Marine Debris Tracker (https://www.debristracker.org/data/). Each selected city required its own evaluation in terms of quantitative debris data over a several year time span. Ideal locations featured several thousand data points across 2-3 years for early-stage model development. Despite the scalability of the model itself, results and accuracy lay contingent on data quantity and external factors.
#step to load city's data
litter <- read.csv('https://raw.githubusercontent.com/TrevorKap/MUSA810-Marine-Pollution/main/Data/mdt-dataChennai.csv')
# data filter and projection transformation
litter_p <- litter%>%filter(master_material == 'PLASTIC')%>%
st_as_sf(coords = c("longitude", "latitude"), crs = 4326, agr = "constant")%>%st_transform('EPSG:32643')
#img <- raster("/Users/mr.smile/Desktop/UPENN/Spring24/CPLN790/data/population_ind_pak_general/population_10_lon_80_general-v1.5.tif")
chen_bdry <- st_read('https://github.com/TrevorKap/MUSA810-Marine-Pollution/raw/main/Data/Chennai.kml')## Reading layer `GCC-Divisions' from data source
## `https://github.com/TrevorKap/MUSA810-Marine-Pollution/raw/main/Data/Chennai.kml'
## using driver `KML'
## Simple feature collection with 200 features and 2 fields
## Geometry type: MULTIPOLYGON
## Dimension: XYZ
## Bounding box: xmin: 80.13409 ymin: 12.85209 xmax: 80.33149 ymax: 13.23505
## z_range: zmin: 0 zmax: 6.82121e-13
## Geodetic CRS: WGS 84
chen_bdry <- st_set_crs(chen_bdry, 4326)%>%st_transform('EPSG:32643')
temp_bd <- st_read('https://github.com/TrevorKap/MUSA810-Marine-Pollution/raw/main/Data/Chennai.kml')## Reading layer `GCC-Divisions' from data source
## `https://github.com/TrevorKap/MUSA810-Marine-Pollution/raw/main/Data/Chennai.kml'
## using driver `KML'
## Simple feature collection with 200 features and 2 fields
## Geometry type: MULTIPOLYGON
## Dimension: XYZ
## Bounding box: xmin: 80.13409 ymin: 12.85209 xmax: 80.33149 ymax: 13.23505
## z_range: zmin: 0 zmax: 6.82121e-13
## Geodetic CRS: WGS 84
temp_bbox <- get_bbox(temp_bd) # get the bounding box (the projection of temp_bd should be epsg4326)
temp_fish <- create_fish(chen_bdry) # get the fishnet of the city (the projection of chen_bdry should be meter degree)
final_net <- countfishnet(temp_fish, litter_p) # create base fishnet with litter (also the one used as final one)
final_net <- pn_gen(stor_df) # add osm point data and knn calculation result into the final dataset
#temp_point <- raster_process(img,temp_bd) # convert the raster file to point one
#pop_result <- pop_process(temp_point, temp_fish, 32643) # summary the population result
#final_net <- add_pop(pop_result,final_net) # add the pop result into the final dataset
final_net <- moran_gen(final_net,stor_df) # calculate the moran's I result into the dataset
# DONE!
chen_net <- final_netlitter <- read.csv('https://raw.githubusercontent.com/TrevorKap/MUSA810-Marine-Pollution/main/Data/mdt-dataBangkok.csv')
litter_b <- litter%>%
filter(master_material == 'PLASTIC')%>%
st_as_sf(coords = c("longitude", "latitude"), crs = 4326, agr = "constant")%>%
st_transform('EPSG:32643')
litter_b <- subset(litter_b, select = -c(event_name, project_name))
bok_bdry <- st_read('https://github.com/TrevorKap/MUSA810-Marine-Pollution/raw/main/Data/Bangkok.kml')## Reading layer `Bangkok' from data source
## `https://github.com/TrevorKap/MUSA810-Marine-Pollution/raw/main/Data/Bangkok.kml'
## using driver `KML'
## Simple feature collection with 1 feature and 2 fields
## Geometry type: POLYGON
## Dimension: XYZ
## Bounding box: xmin: 100.3266 ymin: 13.496 xmax: 100.9387 ymax: 13.95524
## z_range: zmin: 0 zmax: 0
## Geodetic CRS: WGS 84
bok_bdry <- st_set_crs(bok_bdry, 4326)%>%
st_transform('EPSG:32643')
bok_bd <- st_read('https://github.com/TrevorKap/MUSA810-Marine-Pollution/raw/main/Data/Bangkok.kml')## Reading layer `Bangkok' from data source
## `https://github.com/TrevorKap/MUSA810-Marine-Pollution/raw/main/Data/Bangkok.kml'
## using driver `KML'
## Simple feature collection with 1 feature and 2 fields
## Geometry type: POLYGON
## Dimension: XYZ
## Bounding box: xmin: 100.3266 ymin: 13.496 xmax: 100.9387 ymax: 13.95524
## z_range: zmin: 0 zmax: 0
## Geodetic CRS: WGS 84
temp_bbox <- get_bbox(bok_bd)
temp_fish <- create_fish(bok_bdry)
final_net <- countfishnet(temp_fish, litter_b)
final_net <- pn_gen(stor_df)
final_net <- moran_gen(final_net,stor_df)
bok_net <- final_netlitter <- read.csv('https://raw.githubusercontent.com/TrevorKap/MUSA810-Marine-Pollution/main/Data/mdt-dataSantiago.csv')
# data filter and projection transformation
litter_s <- litter%>%
filter(master_material == 'PLASTIC')%>%
st_as_sf(coords = c("longitude", "latitude"), crs = 4326, agr = "constant")%>%
st_transform('EPSG:32643')
litter_s <- subset(litter_s, select = -c(event_name, project_name))
# load boundary data
san_bdry <- st_read('https://github.com/TrevorKap/MUSA810-Marine-Pollution/raw/main/Data/Santiago.kml')## Reading layer `Santiago' from data source
## `https://github.com/TrevorKap/MUSA810-Marine-Pollution/raw/main/Data/Santiago.kml'
## using driver `KML'
## Simple feature collection with 1 feature and 2 fields
## Geometry type: POLYGON
## Dimension: XYZ
## Bounding box: xmin: -70.7876 ymin: -33.54327 xmax: -70.54899 ymax: -33.36366
## z_range: zmin: 0 zmax: 0
## Geodetic CRS: WGS 84
san_bd <- san_bdry
san_bdry <- st_set_crs(san_bdry, 4326)%>%
st_transform('EPSG:32643')
temp_bbox <- get_bbox(san_bd)
temp_fish <- create_fish(san_bdry)
final_net <- countfishnet(temp_fish, litter_s)
final_net <- pn_gen(stor_df)
final_net <- moran_gen(final_net,stor_df)
san_net <- final_netchen_net <- chen_net %>%
# dplyr::select(!c(avg_pop ,sum_pop))%>%
mutate(city = 'Chennai',
country = 'India')
san_net <- san_net %>%
mutate(city = 'Santiago',
country = 'Chile')
bok_net <- bok_net %>%
mutate(city = 'Bangkok',
country = 'Thailand')
tt_net <- rbind(chen_net,san_net,bok_net) %>% mutate(uniqueID = 1:n())
# reference: https://wiki.openstreetmap.org/wiki/Map_features#Entertainment,_Arts_&_Culturee
leisure <- c('park')
act <- c('maxspeed') The litter data featured a wide variety of item categories and general information about each piece. Despite the quality of detail for each recorded sample, this only accounts for litter that has been identified, recorded, and disposed of. This data does not account for litter that was identified but never disposed of, accounted for, or assumed. Regardless, areas where litter was not recorded in a general surrounding could not confidently be assumed to be present or not present.
Each chart below visualizes a dependent variable reformatted for repeatable use an in a statistical context that better represents its relation to litter. The examples are, restaurants, roads, retail proximity, and significant presence of restaurants. Our variable selection was based on our hypothesis of areas of human activity leading to higher litter risk. The aim of these variables is to act as proxies for litter cases, providing insight on where litter most likely accumulated or ends up. These variables, similarly to litter, are analyzed on a fishnet grid where the quantity is counted per cell. Each variable is then placed on a combined fishnet grid with the litter data, computed using a Chi-Squared test, and evaluated for association between the variables and litter.
Restaurants are used as a point of significance because of their relation to commercial activity. We wanted to examine a ‘significance’ analysis because of their direct implication with high human activity, excess flow of goods, and interconnection of urban systems. Areas of low distances of a “nearest neighbor” imply restaurants are close to one another geographically, indicate density in resources, and identify an urban core.
## Warning: The `size` argument of `element_rect()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Use of `net_one[[variable]]` is discouraged.
## ℹ Use `.data[[variable]]` instead.
## Warning: Use of `net_one[[variable]]` is discouraged.
## ℹ Use `.data[[variable]]` instead.
#visual_count(bok_net,'industrial')
#visual_count(chen_net,"residential")
#visual_count(chen_net,'retail')
#visual_cotinuous(chen_net,'waste_nn')
#visual_cotinuous(san_net,'water_nn')
#visual_cotinuous(san_net,"restaurant_nn")
#visual_cotinuous(san_net,'road_nn')
#visual_cotinuous(san_net,'industrial_nn')
#visual_cotinuous(san_net,"residential_nn")
visual_cotinuous(san_net,'retail_nn')## Warning: Use of `net_one[[variable]]` is discouraged.
## ℹ Use `.data[[variable]]` instead.
#visual_count(bok_net,'waste_sig')
#visual_count(bok_net,'water_sig')
visual_count(bok_net,"restaurant_sig")## Warning: Use of `net_one[[variable]]` is discouraged.
## ℹ Use `.data[[variable]]` instead.
#visual_count(bok_net,'road_sig')
#visual_count(bok_net,'industrial_sig')
#visual_count(bok_net,"residential_sig")
#visual_count(bok_net,'retail_sig')
#visual_cotinuous(bok_net,'waste_sig_dis')
#visual_cotinuous(bok_net,'water_sig_dis')
#visual_cotinuous(bok_net,"restaurant_sig_dis")
#visual_cotinuous(bok_net,'road_sig_dis')
#visual_cotinuous(bok_net,'industrial_sig_dis')
#visual_cotinuous(bok_net,"residential_sig_dis")
#visual_cotinuous(bok_net,'retail_sig_dis')The following correlation analysis shows the various features of our model. This matrix allows for further PCA analysis and variable selection.
cor_chen <- st_drop_geometry(chen_net) %>% dplyr::select(!c(uniqueID,cvID,city,country))
cor_nor_chen<- scale(cor_chen)
corr_matrix <- cor(cor_nor_chen)
ggcorrplot(corr_matrix)The PCA analysis below is used to identify the most important variables in the dataset and their correlation with litter. The reason why we perform PCA is to decrease the dimension of variable. The following result can get explained by the color and direction in the last map. and the last PCA map is the basic for choosing the several variables. Based on the result, the selected variables included ‘waste_sig_dis, restaurant_sig_dis, residential_sig_dis, water_sig_dis, residential_nn, industrial_sig_dis, industrial_sig, restaurant_sig, industrial_nn, road_sig_dis, residential_sig, restaurant_sig, restaurant’ as the ‘shortened model’ independent variables.
## Importance of components:
## Comp.1 Comp.2 Comp.3 Comp.4 Comp.5
## Standard deviation 1.3437918 0.45714552 0.34999676 0.31651721 0.28298158
## Proportion of Variance 0.6718611 0.07775431 0.04557678 0.03727436 0.02979421
## Cumulative Proportion 0.6718611 0.74961541 0.79519218 0.83246654 0.86226075
## Comp.6 Comp.7 Comp.8 Comp.9 Comp.10
## Standard deviation 0.2604250 0.23294154 0.19522931 0.18214261 0.17437583
## Proportion of Variance 0.0252337 0.02018875 0.01418096 0.01234351 0.01131327
## Cumulative Proportion 0.8874944 0.90768320 0.92186415 0.93420766 0.94552093
## Comp.11 Comp.12 Comp.13 Comp.14
## Standard deviation 0.17260785 0.137064350 0.132788962 0.122088555
## Proportion of Variance 0.01108502 0.006989797 0.006560538 0.005545816
## Cumulative Proportion 0.95660596 0.963595753 0.970156291 0.975702107
## Comp.15 Comp.16 Comp.17 Comp.18
## Standard deviation 0.117605399 0.113017395 0.108573326 0.094784911
## Proportion of Variance 0.005146003 0.004752325 0.004385931 0.003342673
## Cumulative Proportion 0.980848110 0.985600435 0.989986366 0.993329040
## Comp.19 Comp.20 Comp.21 Comp.22
## Standard deviation 0.092298105 0.068434314 0.052043716 0.0345899530
## Proportion of Variance 0.003169575 0.001742462 0.001007748 0.0004451593
## Cumulative Proportion 0.996498615 0.998241077 0.999248826 0.9996939851
## Comp.23 Comp.24 Comp.25 Comp.26
## Standard deviation 0.0223501262 1.339154e-02 1.050276e-02 4.780985e-03
## Proportion of Variance 0.0001858555 6.672318e-05 4.104142e-05 8.504530e-06
## Cumulative Proportion 0.9998798406 9.999466e-01 9.999876e-01 9.999961e-01
## Comp.27 Comp.28 Comp.29
## Standard deviation 3.071381e-03 1.011205e-03 5.138865e-09
## Proportion of Variance 3.509804e-06 3.804470e-07 9.825393e-18
## Cumulative Proportion 9.999996e-01 1.000000e+00 1.000000e+00
## Comp.1 Comp.2
## count 0.057471658 0.032678686
## water 0.066462846 0.077170737
## water_nn -0.197605679 0.150752995
## waste 0.057538240 0.008024004
## waste_nn -0.325388787 -0.047608165
## restaurant 0.132036365 0.016882487
## restaurant_nn -0.303620791 0.095307986
## road 0.070157386 -0.059669753
## road_nn -0.112503900 0.098354361
## industrial -0.001935786 0.392288795
## industrial_nn 0.027441387 -0.463179747
## residential 0.148690605 -0.044042186
## residential_nn -0.217478666 0.021856831
## retail 0.098422015 -0.014419925
## retail_nn -0.309812717 -0.099310380
## water_sig 0.098990721 0.151531165
## water_sig_dis -0.219042688 0.083002483
## waste_sig 0.093078608 0.020332085
## waste_sig_dis -0.321334844 -0.048197790
## restaurant_sig 0.140222065 0.034350205
## restaurant_sig_dis -0.314004845 -0.134647238
## road_sig -0.053967930 0.050049355
## road_sig_dis 0.108355048 -0.220262098
## industrial_sig 0.027500200 0.328696066
## industrial_sig_dis -0.048589728 -0.534682716
## residential_sig 0.146234447 -0.049609238
## residential_sig_dis -0.306025234 0.208932511
## retail_sig 0.131280271 -0.009468901
## retail_sig_dis -0.320952438 -0.146133128
fviz_pca_var(data.pca, col.var = "cos2",
gradient.cols = c("black", "orange", "green"),
repel = TRUE)In light of the inherent bias stemmed from litter data, we also are developing a ‘Reduced Bias’ Model in attempt to compensate for potential assumptions made by the model in terms of quantity. We take each indicator and coalesce them into a single variable used for our model to compare litter data to the indicators.
# prepare for model data
df_model <- st_drop_geometry(tt_net)%>%dplyr::select(!cvID)
df_model <- dummy_cols(df_model, select_columns = "city")
df_model <- dummy_cols(df_model, select_columns = "country")
df_model <- df_model %>% dplyr::select(!c(city,country)) %>% mutate(uniqueID = 1:n())
# several model
temp.rf <- randomForest(count ~ ., data = df_model%>%dplyr::select(!uniqueID), mtry = 10,ntree=70,
importance = TRUE, na.action = na.omit)
temp.lr <- glm(count ~ ., data = df_model%>%dplyr::select(!uniqueID),family = "poisson", na.action = na.omit) ## Warning: glm.fit: fitted rates numerically 0 occurred
temp.lr.qs <- glm(count ~ ., data = df_model%>%dplyr::select(!uniqueID),family = "quasi", na.action = na.omit)
temp.lr.qp <- glm(count ~ ., data = df_model%>%dplyr::select(!uniqueID),family = "quasipoisson", na.action = na.omit)
nn_model = neuralnet(count ~ .,data=df_model%>%dplyr::select(!uniqueID),hidden=c(5,2),
linear.output = TRUE
)
temp.hbr <- fit.1<- brm(count~ ., data=df_model%>%dplyr::select(!uniqueID), family=gaussian(),warmup=500, iter=1000, chains=2,cores=2,seed = 1115)## Compiling Stan program...
## Trying to compile a simple C file
## Running /Library/Frameworks/R.framework/Resources/bin/R CMD SHLIB foo.c
## using C compiler: ‘Apple clang version 15.0.0 (clang-1500.0.40.1)’
## using SDK: ‘MacOSX14.0.sdk’
## clang -arch arm64 -I"/Library/Frameworks/R.framework/Resources/include" -DNDEBUG -I"/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/Rcpp/include/" -I"/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/RcppEigen/include/" -I"/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/RcppEigen/include/unsupported" -I"/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/BH/include" -I"/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/StanHeaders/include/src/" -I"/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/StanHeaders/include/" -I"/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/RcppParallel/include/" -I"/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/rstan/include" -DEIGEN_NO_DEBUG -DBOOST_DISABLE_ASSERTS -DBOOST_PENDING_INTEGER_LOG2_HPP -DSTAN_THREADS -DUSE_STANC3 -DSTRICT_R_HEADERS -DBOOST_PHOENIX_NO_VARIADIC_EXPRESSION -D_HAS_AUTO_PTR_ETC=0 -include '/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp' -D_REENTRANT -DRCPP_PARALLEL_USE_TBB=1 -I/opt/R/arm64/include -fPIC -falign-functions=64 -Wall -g -O2 -c foo.c -o foo.o
## In file included from <built-in>:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp:22:
## In file included from /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/RcppEigen/include/Eigen/Dense:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/RcppEigen/include/Eigen/Core:19:
## /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:679:10: fatal error: 'cmath' file not found
## #include <cmath>
## ^~~~~~~
## 1 error generated.
## make: *** [foo.o] Error 1
## Start sampling
## Warning: There were 1000 transitions after warmup that exceeded the maximum treedepth. Increase max_treedepth above 10. See
## https://mc-stan.org/misc/warnings.html#maximum-treedepth-exceeded
## Warning: Examine the pairs() plot to diagnose sampling problems
## Warning: The largest R-hat is 2.97, indicating chains have not mixed.
## Running the chains for more iterations may help. See
## https://mc-stan.org/misc/warnings.html#r-hat
## Warning: Bulk Effective Samples Size (ESS) is too low, indicating posterior means and medians may be unreliable.
## Running the chains for more iterations may help. See
## https://mc-stan.org/misc/warnings.html#bulk-ess
## Warning: Tail Effective Samples Size (ESS) is too low, indicating posterior variances and tail quantiles may be unreliable.
## Running the chains for more iterations may help. See
## https://mc-stan.org/misc/warnings.html#tail-ess
The following model results of each of the models was added to the dataframe. We test a range of statistical models for a comparative analysis, including random forest, linear model, quasi poisson, hierarchical bayes regression. This section also includes a decision tree model. While this model has a strong visualization of the risk area, its accuracy was not strong.
set.seed(123)
data_split <- initial_split(df_model, prop = 0.75)
train_data <- training(data_split)
test_data <- testing(data_split)
# Create a decision tree model specification
tree_spec <- decision_tree(mode = "regression", tree_depth = 4,engine = 'rpart')
# Fit the model to the training data
tree_fit <- tree_spec %>%
fit(count ~ ., data = train_data)
predictions <- tree_fit %>%
predict(test_data) %>%
pull(.pred)
metrics <- metric_set(rmse, rsq)
model_performance <- test_data %>%
mutate(predictions = predictions) %>%
metrics(truth = count, estimate = predictions)
print(model_performance)## # A tibble: 2 × 3
## .metric .estimator .estimate
## <chr> <chr> <dbl>
## 1 rmse standard 9.99
## 2 rsq standard 0.0000909
predictions <- tree_fit %>%
predict(df_model)%>%
pull(.pred)
df_dt_rst <- df_model%>%
mutate(Prediction = predictions)%>%
dplyr::select(uniqueID,count,Prediction)
df_dt_rst <- left_join(final_net%>%dplyr::select(uniqueID),df_dt_rst, by="uniqueID")
#risk_v(df_dt_rst,litter_p,"kmeans")## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `Prediction = predict(model, dataset, type = "response")`.
## Caused by warning in `predict.lm()`:
## ! prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `Prediction = predict(model, dataset, type = "response")`.
## Caused by warning in `predict.lm()`:
## ! prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
df_nn_rst <- model_process(df_model,nn_model)
df_brm_rst <- model_process(df_model,temp.hbr)
estimates_list <- list()
# Iterate over each row by index
for(i in 1:nrow(df_brm_rst)) {
# Extract the 'Prediction' for the current row
current_prediction <- df_brm_rst$Prediction[[i]][1]
estimates_list[[i]] <- current_prediction
}
df_brm_rst$Estimate <- estimates_list
df_brm_rst <- df_brm_rst %>%
dplyr::select(uniqueID,count,Estimate)%>%
rename(Prediction = Estimate)
df_brm_rst$Prediction <- as.numeric(unlist(df_brm_rst$Prediction))
#df_brm_r <- model_result(df_model,fit.1) %>%mutate(model = 'BRM')
#df_rf_r <- model_result(df_model,temp.rf) %>%mutate(model = 'RF')
#df_lr_r <- model_result(df_model,temp.lr) %>%mutate(model = 'LR')
#df_lrqs_r <- model_result(df_model,temp.lr.qs)%>%mutate(model = 'LR_quasi')
#df_lrqp_r <- model_result(df_model,temp.lr.qp)%>%mutate(model = 'LR_quasipoisson')
#df_nn_r <- model_result(df_model,nn_model)%>%mutate(model = 'Neural Net')
#df_r_tt <- do.call("rbind", list(df_rf_r, df_lr_r, df_lrqs_r,df_lrqp_r,df_nn_r))
df_rf_rst <- left_join(tt_net%>%dplyr::select(uniqueID,city),df_rf_rst, by="uniqueID")
df_lr_rst <- left_join(tt_net%>%dplyr::select(uniqueID,city),df_lr_rst, by="uniqueID")
df_lrqs_rst <- left_join(tt_net%>%dplyr::select(uniqueID,city),df_lrqs_rst, by="uniqueID")
df_lrqp_rst <- left_join(tt_net%>%dplyr::select(uniqueID,city),df_lrqp_rst, by="uniqueID")
df_brm_rst <- left_join(tt_net%>%dplyr::select(uniqueID,city),df_brm_rst, by="uniqueID")For our various models, we have tested them on Bangkok as well. The results are displayed below. Each model has a different level of sensitivity.
Below shows the various models for Chennai and their predictions of litter risk across 5 categories of intensity.
df_rf_c <- df_rf_rst %>% filter(city == 'Chennai')
df_lr_c <- df_lr_rst %>% filter(city == 'Chennai')
df_lrqs_c <- df_lrqs_rst %>% filter(city == 'Chennai')
df_lrqp_c <- df_lrqp_rst %>% filter(city == 'Chennai')
df_brm_c <- df_brm_rst %>% filter(city == 'Chennai')
grid.arrange(
risk_v(df_rf_c,litter_p,"kmeans",'random forest'),
risk_v(df_lr_c,litter_p,"kmeans",'linear regression'),
risk_v(df_lrqs_c,litter_p,"kmeans",'Lr-quasi'),
risk_v(df_lrqp_c,litter_p,"kmeans",'Lr-quasipossion'),
risk_v(df_brm_c,litter_p,"kmeans",'HrB regression'),nrow = 2
)Below shows the various models for Bangkok and their predictions of litter risk across 5 categories of intensity.
df_rf_b <- df_rf_rst %>% filter(city == 'Bangkok')
df_lr_b <- df_lr_rst %>% filter(city == 'Bangkok')
df_lrqs_b <- df_lrqs_rst %>% filter(city == 'Bangkok')
df_lrqp_b <- df_lrqp_rst %>% filter(city == 'Bangkok')
df_brm_b <- df_brm_rst %>% filter(city == 'Bangkok')
grid.arrange(
risk_v(df_rf_b,litter_b,"kmeans",'random forest'),
risk_v(df_lr_b,litter_b,"kmeans",'linear regression'),
risk_v(df_lrqs_b,litter_b,"kmeans",'Lr-quasi'),
risk_v(df_lrqp_b,litter_b,"kmeans",'Lr-quasipossion'),
risk_v(df_brm_b,litter_b,"kmeans",'HrB regression'),nrow = 2
)The following comparative model visaulizes the strength of the model as it is used acrossed cities.
grid.arrange(
#risk_v(df_rf_s,litter_s,'kmeans'),
risk_v(df_rf_c,litter_p,'kmeans','random forest'),
risk_v(df_rf_b,litter_b,'kmeans','random forest'),nrow = 1)grid.arrange(
#risk_v(df_lr_s,litter_s,'kmeans'),
risk_v(df_lr_c,litter_p,'kmeans','linear regression'),
risk_v(df_lr_b,litter_b,'kmeans','linear regression'),nrow = 1)grid.arrange(
#risk_v(df_lrqs_s,litter_s,'kmeans'),
risk_v(df_lrqs_c,litter_p,'kmeans','Lr-quasi'),
risk_v(df_lrqs_b,litter_b,'kmeans','Lr-quasi'),nrow = 1)grid.arrange(
#risk_v(df_lrqp_s,litter_s,'kmeans'),
risk_v(df_lrqp_c,litter_p,'kmeans','Lr-quasipossion'),
risk_v(df_lrqp_b,litter_b,'kmeans','Lr-quasipossion'),nrow = 1)